# ####################################
# SpeechUT Base model #
# ####################################
#export CUDA_VISIBLE_DEVICES=0,1,2,3
conda activate

JOB=job25
CODE_ROOT=/data0/SpeechUT
max_tokens=800000
DATA_DIR=/data0/SpeechUT/dataset/MuSTC/en_de
MODEL_DIR=/data0/SpeechUT/finetune_mustc/en_de/$JOB
lang=de
world_size=4
w2v_path=/data0/SpeechUT/model/checkpoint_217_400000.pt


python  $CODE_ROOT/fairseq/fairseq_cli/train.py ${DATA_DIR} \
    --save-dir ${MODEL_DIR} \
    --user-dir /data0/SpeechUT/speechut \
    --task speech_to_text \
    --config-yaml config_en${lang}.yaml \
    --train-subset "train_st" \
    --valid-subset "dev_st" \
    --fp16 \
    --seed 1 \
    --ddp-backend "no_c10d" \
    --distributed-world-size ${world_size} \
    --tensorboard-logdir /data0/SpeechUT/speechut/tensorboard \
    --criterion label_smoothed_cross_entropyrdd --report-accuracy \
    --label-smoothing 0.3 \
    --optimizer adam \
    --clip-norm 1.0 \
    --lr 3e-05 \
    --lr-scheduler polynomial_decay --warmup-updates 5000 \
    --max-update 50000 \
    --total-num-update 50000 \
    --update-freq 2 \
    --max-tokens ${max_tokens} \
    --max-sentences 16 \
    --max-tokens-valid ${max_tokens} \
    --grouped-shuffling \
    --max-source-positions ${max_tokens} \
    --skip-invalid-size-inputs-valid-test \
    --num-workers 4 \
    --best-checkpoint-metric "accuracy" \
    --maximize-best-checkpoint-metric \
    --arch "speechut_st_legacy" \
    --w2v-path ${w2v_path} \
    --layerdrop 0.1 \
    --activation-dropout 0.1 \
    --attention-dropout 0.1 \
    --feature-grad-mult 1.0 \
    --apply-mask --mask-prob 0.5 \
    --log-format "json" \
    --log-interval 100 \
    --save-interval-updates 1000 \
    --save-interval 1 \
    --keep-last-epochs 5 \
    --keep-best-checkpoints 10 \

